clear all;

% This is the master file for Del Boca, Flinn, Wiswall and Verriest (2025).
% This file calls all other m-files, and has comments throughout the code.
% Any further questions can be directed at Ewout Verriest, preferably via
% email (ewoutverriest@gmail.com).

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Set directories
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% change this at will if running from a different computer
main_dir = 'C:\Users\ejv5165\OneDrive - The Pennsylvania State University\Research\DFVW\replication package\'; 
% Note: this folder should have two subfolders on your hard drive, called 'paper' and 'matlab'

paper_dir = [main_dir,'paper\']; 
% Note: this folder is initially empty, and - after running the code - will later contain all figures and LaTeX tables for the final paper
% In order to generate all tables and figures, you will need to turn on the
% following toggles below (see "Choose what to do")
% 1) Set 'fp.simulate' equal to 1 to simulate the model and run 'write_output_paper.m' after that
% 2) Set 'fp.compstat_CCT' equal to 1 to run the first counterfactual exercise and generate the corresponding output table
% 3) Set 'fp.compstat_SES' equal to 1 to run the second counterfactual exercise and generate the corresponding output table

matlab_dir = [main_dir,'matlab\']; 
% Note: this folder should have three subfolders on your hard drive, called 'data', 'mfiles' and 'output'

data_dir = [matlab_dir,'data\'];
% Note: this subfolder should contain all the necessary data to run the code:
% 1) a file called 'child_data_PSID' which contains the cleaned PSID-CDS household panel data
% 2) a file called 'osaka_2010_final' which contains the cleaned survey data on adults' discount factors, from the 2010 Osaka Preference Parameter Survey
% 3) a file called 'steinberg_data_nomiss', which contains the survey data on children's and adolescents' discount factors, from the Steinberg et al. (2009) study.

mfiles_dir = [matlab_dir,'mfiles\']; 
% Note: this folder should contain all the Matlab .m-files which are called
% upon at some point by this master file.

output_dir = [matlab_dir,'output\'];
% This output folder will save some temporary data files (e.g. parameter
% estimates, bootstrap results, MSM weighting matrices, etc.)

% Set final directory where we save all output, and where we load estimates from
cd(output_dir);

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Choose what to do
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

fp.do_beta_draws = 1; % default: 1
% This draws initial latent k variables from beta distribution.
% This step only needs to be done once, then can be set to 0 to save time.

fp.do_boot = 1; % default: 1
% This (re)calculates MoM weighting matrix, activate this whenever you
% add/modify the set of moments in build_moments.m
% This step only needs to be done once, then can be set to 0 to save time.

% ESTIMATION OF MAIN MODEL
% Run the estimation of the parameters or not?
fp.estimate = 0; % 1 if yes, 0 if no
% If set to 1, it starts the Method of Simulated Moments routine

% Note: to turn off/on the estimation of the exogenous non-labor income
% process, go to "run_income_process.m" and turn it on/off there

% SIMULATION
% Simulate data at the latest set of parameter estimates or not?
fp.simulate = 1; % set this equal to 0 to skip the simulation and go immediately to e.g. the policy code

% Calculate Standard Errors of model parameters using bootstrap?
fp.do_boot_se = 0; % 1 if yes, 0 if no

fp.doing_bootstrap_se = 0; % KEEP THIS FIXED AT 0 HERE
fp.doing_bootstrap_se_NLI = 0; % KEEP THIS FIXED AT 0 HERE
% see below for optimization settings (fp.options_est_BSE)

% Comparative Statics exercise 1:
fp.compstat_CCT = 1; % set 0 to turn off, 1 to turn on (see end of code)

% Take a given set of estimates, simulate a complete data set under
% (0) the baseline model with CCT cost + endogenous child beta
% (1) infinite CCT cost: kappa = infinity
% (2) no effect of CCT on patience: b2_up = b3_up = 0
% (3) no CCT cost and no effect of CCT on patience: kappa = b2_up = b3_up = 0

% Comparative Statics exercise 2:
fp.compstat_SES = 1; % set 0 to turn off, 1 to turn on (see end of code)

% DON'T CHANGE THIS HERE- SET THEM EQUAL TO 0 IN BASELINE
fp.compstat_age_homwages = 0;   % KEEP FIXED
fp.compstat_SES_homwages = 0;   % KEEP FIXED
fp.compstat_SES_homtech = 0;    % KEEP FIXED
fp.compstat_SES_homprefs = 0;   % KEEP FIXED

% Take a given set of estimates, simulate a complete data set under
% (1) alternative model where we shut down all heterogeneity
% (2) alternative model where we allow heterogeneity in betas
% (3) alternative model where we allow heterogeneity in betas and tech
% (4) alternative model where we allow heterogeneity in betas, tech and
% wages (i.e. baseline model).

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Model parameters
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% Define upper and lower bounds on the parameters governing the TFP process
fp.L0_min = 0;
fp.L0_max = 10; % 1.2
fp.L_min = 0;
fp.L_max = 10; % 1.2
fp.k_min = -4;
fp.k_max = 4;
fp.t0_min = -20;
fp.t0_max = 20;

% Below, we define the parents' beta_p grid based on Osaka data.
% Then, the child's betas will be defined on a different grid (based on the Steinberg et al. data)

% Set number of grid points for discount factor distribution
% Keep the same number for parents and child betas
fp.NN_p = 3; % number of possible levels of beta_p
fp.NN_c = 3; % number of possible levels of beta_c (also called Z in the paper)

% For parents: pick a number of possible education levels for the conditional distribution
fp.N_educ_betap = 3;
% For children, assume same number of SES categories
fp.N_educ_betac = fp.N_educ_betap;

% Define grid for child cognitive human capital process
fp.KK = 20; % number of grid points
p_min = 0.01; % minimal probability of answering a question correctly
p_max = 1-p_min; % maximal probability of answering a question correctly
% Note: raw Letter-Word test scores range from 1-57

% Set intercept and slope of the latent capital conversion process (see sim_choices.m and see also paper Appendix D.1.5 for more details)
fp.kappa0 = log(p_min) - log(1-p_min); % this ensures that ln(k_min) = 0
fp.kappa1 = 1;

% Define probability grid for the beta/binomial distribution:
% Create an even-spaced grid of log-k values, and then convert to a proper grid of probability p values:
ln_k_min = 0;
ln_k_max = 2*(log(p_max)-log(p_min)); % this follows from symmetry and the assumption that log(k_min) = 0 for lowest grid point + the assumption that kappa1 = 1 (we can either choose k_max OR kappa1, not both)
ln_k_grid = linspace(ln_k_min,ln_k_max,fp.KK)'; % creates an evenly spaced grid, makes it a column vector
fp.kgrid = exp(ln_k_grid); % must be a column vector

% Fix the altruism assumption (parameter phi)
fp.varphi = 1/3; % choose a value of phi between 0 and 1

fp.M = 16; % number of periods we consider (1-16)
fp.TT_p = 7*16; % weekly time endowment of parents (after sleep)
% For kids, we subtract the median school time for every age (starting from
% an endowment of 112 hours, after sleep).
% Since we have exactly 17 data rows per kid (ages 0-16), we can predefine the
% vector of time endowments (defined in the paper as T_tilde):
fp.TT_c = NaN*ones(fp.M+1,1);
% We will fill this out after reading in the data (see below)

fp.test_max = 57; % maximum Letter Word Score (raw)

fp.B = 200; % number of draws to calculate the weighting matrix
fp.B_SE = 50; % number of rounds for bootstrapping the SEs

% Number of simulations per household
fp.R = 10;

% Options for the Stackelberg solution solver (fminunc)
fp.options_solution = optimset('LargeScale','off', 'TolFun', 0.0001, 'TolX', 0.0001, ...
    'Display','off', 'MaxIter',1e10, 'MaxFunEvals',10000);

% Options for the sim_data_betac function
fp.options_solution2 = optimset('LargeScale','off', 'TolFun', 1e-8, 'TolX', 1e-8, ...
    'Display','iter', 'MaxIter',1e10, 'MaxFunEvals',10000);

% Options for the Stackelberg FOC system solver (fsolve)
fp.options_fsolve = optimoptions('fsolve','Display','off','TolFun', 0.0001, 'TolX', 0.0001, ...
    'Display','off', 'MaxIter',1e10, 'MaxFunEvals',10000);

% Options for the estimation algorithm of the Non-labor income process
fp.options_est_NLI = optimset('LargeScale','off','TolFun', 0.001, 'TolX', 0.001, ...
    'Display','iter', 'MaxIter',1e10, 'MaxFunEvals',3000);

% Options for the estimation (fminsearch)
fp.options_est = optimset('LargeScale','off','TolFun', 1, 'TolX', 1, ...
    'Display','iter', 'MaxIter',1e10, 'MaxFunEvals',1000);

% Options for the Bootstrap Standard Errors (BSE) estimation (fminsearch):
fp.options_est_BSE = optimset('LargeScale','off','TolFun', 1, 'TolX', 1, ...
    'Display','iter', 'MaxIter',1e10, 'MaxFunEvals',1000);

% Optimization options--only applies to our edited simplex routine (fminsearch_matt)
global usual_delta % alternative parameter used in fminsearch_matt
usual_delta = 0.25;
% usual_delta = 0.01;


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Load PSID-CDS data
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% Load the cleaned panel data set from PSID-CDS
data = dlmread([data_dir,'child_data_PSID.txt'],'\t');

% Replace missing data with nan
data(data == -9) = NaN;

% Data parameters
[fp.N_long,temp] = size(data); %total N x T observations (247 x 17)

% Data indices
col_long.id = 1; % household identifier
col_long.year = 2; % calendar year (minus 1900)
col_long.numb_child = 3; % number of children in the household
col_long.mom_age = 4; % mother's age
col_long.dad_age = 5; % father's age
col_long.fam_size = 6; % family size
col_long.mom_educ = 7; % mother's years of schooling
col_long.mom_hours = 8; % mother's weekly hours of labor
col_long.mom_wage = 9; % mother's hourly wage
col_long.dad_hours = 10; % father's weekly hours of labor
col_long.dad_wage = 11; % father's hourly wage
col_long.nonlabor_income = 12; % household's weekly non-labor income
col_long.age = 13; % child's age
col_long.lw_raw = 14; % Letter Word raw score
col_long.dad_educ = 15; % father's years of schooling
col_long.tau12 = 16; % mom and dad joint active time (will be allocated later)
col_long.tau1 = 17; % mom active, dad passive or away
col_long.tau2 = 18; % dad active, mom passive or away
col_long.tau_c = 19; % child productive time (neither parent active)
col_long.tau_school = 20; % actual observed school time for each child (regular and other/irregular)
col_long.T_tilde = 21; % this is the child's corrected time endowment (T - median_tau_school), where the median is defined for each age-group
% Note: T_tilde + tau_school is not necessarily equal to 112, since we're using the median across all children of a given age.

nvar = 21;

% Load CDS allowance data
% questions asked to the child:
col_long.allowance_A = nvar+1;
col_long.allowance_B = nvar+2;
col_long.allowance_C = nvar+3;
col_long.allowance_D = nvar+4;
% questions asked to the parents:
col_long.allowance_A2 = nvar+5;
col_long.allowance_B2 = nvar+6;
col_long.allowance_C2 = nvar+7;
col_long.allowance_D2 = nvar+8;
col_long.inc_cat = nvar+9;
col_long.CCT = nvar+10; % dummy 0/1 indicating whether household uses an internal CCT (i.e. a conditional allowance) in this period or not (see sim_funct.m)

nr_vars = nvar+10;

fp.nr_obs_vars = nr_vars; % use this for censoring purposes (see build_obj.m). We shouldn't censor variables below so we can build simulated moments

%For estimation purposes, we also create a variable for the child's age in
%year 1997, which is a constant for every child-observation (see below)
col_long.child_age97 = nr_vars+1; % child's age in 1997
% Extra columns for which we don't have actual data:
col_long.V_p = nr_vars+2; % parent's value function
col_long.k_tp1 = nr_vars+3; % latent child capital in t+1 - note: this will generally be OFF THE GRID (see sim_choices.m)
col_long.e = nr_vars+4;
col_long.c = nr_vars+5;
col_long.x = nr_vars+6; % child's consumption
col_long.b = nr_vars+7; % child's base consumption - intercept of reward function
col_long.r = nr_vars+8; % child's wage rate - slope of reward function
col_long.u_p = nr_vars+9; % parents' selfish instantaneous utility
col_long.u_p_tilde = nr_vars+10; % parent's altruistic instantaneous utility
col_long.u_c = nr_vars+11; % child's instantaneous utility
col_long.l1 = nr_vars+12; % mom leisure
col_long.l2 = nr_vars+13; % dad leisure
col_long.lc = nr_vars+14; % child leisure
col_long.Y = nr_vars+15; % total household income
col_long.k = nr_vars+16; % latent child capital
col_long.omega = nr_vars+17; % CCT utility cost omega_t for each household
col_long.u_p_17 = nr_vars+18; % terminal utility at age 17
col_long.u_c_17 = nr_vars+19; % terminal utility at age 17
col_long.V_c = nr_vars+20; % child bellman value
col_long.n = nr_vars+21; % child patience capital (on grid)
col_long.n_tp1 = nr_vars+22; % future child patience capital
col_long.betac_17 = nr_vars+23; % child's discount factor in final period
col_long.log_k_17 = nr_vars+24; %child's final period log skills (on grid)

fp.tot_cols = col_long.log_k_17; %total number of variables - UPDATE WHENEVER ADDING NEW VARIABLES!

%number of households/children (remember we select only one child per hh)
fp.N = max(data(:,col_long.id));

fp.min_age = 3; % youngest possible child age when LW is first observed in 1997 (since we only observe LW if age >= 3)
fp.max_age = 16; % exogenously imposed; this is the final age for which we use data

% Create the new variable child_age97 (could also be done in Stata before importing data)
for i = 1:fp.N
    rows_i = data(:,col_long.id) == i;
    data_i = data(rows_i,:);
    row97 = data_i(:,col_long.year) == 97;
    age97 = data_i(row97,col_long.age);
    data(rows_i,col_long.child_age97) = age97*ones(size(data_i,1),1); % fill out this entire row
end

% Create the adjusted child time endowments:
fp.TT_c = data(1:17,col_long.T_tilde);

% Allocate joint parental time between the two parents based on the relative ratio:
for i = 1:size(data,1)
    tau12 = data(i,col_long.tau12);
    tau1 = data(i,col_long.tau1);
    tau2 = data(i,col_long.tau2);
    if tau12>0 && ~isnan(tau12)
        if tau1 > 0 || tau2 > 0 % allocate proportionally
            data(i,col_long.tau1) = tau1 + (tau1/(tau1+tau2))*tau12; % new tau1
            data(i,col_long.tau2) = tau2 + (tau2/(tau1+tau2))*tau12; % new tau2
        elseif tau1 == 0 && tau2 == 0 % allocate 50/50
            data(i,col_long.tau1) = 0.5*tau12; % new tau1
            data(i,col_long.tau2) = 0.5*tau12; % new tau2
        end
    end
    data(i,col_long.tau12) = NaN; % replace missing
end

% Construct CCT "data" based on allowance questions from 2002 and 2007 CDS

% Recall: allowance_ABCD2 are the parental reports (more complete, reported for ages 6+) and
% allowance_ABCD are the child reports (only reported for ages 12+)
% In our baseline, we use the parental reports.

tmp_CCT = NaN(size(data,1),1);
% only look at 2002 + 2007 data when we observe conditional allowances
tmp0a = data(:,col_long.allowance_A2) == 0 & data(:,col_long.year) > 97; % question D might be missing here
tmp0b = data(:,col_long.allowance_A2) == 1 & data(:,col_long.allowance_D2) == 0 & data(:,col_long.year) > 97;
tmp1 = data(:,col_long.allowance_A2) == 1 & data(:,col_long.allowance_D2) == 1 & data(:,col_long.year) > 97;
% this excludes observations where A = 1 (i.e. they use allowance) but D is missing!
tmp_CCT(tmp0a==1)=0;
tmp_CCT(tmp0b==1)=0;
tmp_CCT(tmp1==1)=1;

data(:,col_long.CCT) = tmp_CCT; % assume this is how we "proxy" observe CCTs in the data

% See build_moments.m for how we construct CCT moments based on child age and parental education.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Load and clean Osaka Preference Parameter Study data
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% For parents, we allow discount factor to be correlated to education,
% and calibrate the conditional distribution exogenously based on Osaka Preference Parameter Survey data.

% Note: we added an if-loop that either runs the "data_load_osaka" file if
% the data file is accessible. If not (e.g. for replication purposes
% without requiring access to the Osaka survey data sets), then we just
% load the final output manually so the code can proceed.

filename = [data_dir,'osaka_2010_final.txt']; 
if exist(filename,'file') > 0 % data file exists

    [betap_pdf,betap_vec,osaka_moments_data] = data_load_osaka(fp.NN_p,fp.N_educ_betap,data_dir,paper_dir);
    % This function generates an NN x N_educ_betap matrix called "betap_dist"
    % which contains probabilities (summing up to 1 in each column, i.e. for each education
    % type), and a column vector of size NN x 1 "betap_vec" with the discrete
    % discount factor levels (the grid is fixed across all education types)
else
    % if the Osaka raw data file (i.e., the output from the STATA file
    % "master_osaka_betas.do) does not exist, we want the code to proceed
    % anyway. Our output only contains summary statistics; not
    % any individual identifiers from any of the Osaka survey respondents.
    % We also report these summary stats in the paper (e.g. see Appendix titled "Data
    % on parental discount factors"), so they can be retrieved from there
    % as well.

    disp('Raw Osaka data file is not available => loading final results only so code can proceed')
    % The three outputs of the "data_load_osaka" file are:
    % 1) 3 by 3 matrix of calibrated conditional probability distribution of falling into discount factor low/middle high (rows) conditional on own education level = low/middle/high (columns)
    betap_pdf = [0.3532   0.3007   0.2354 ;
                 0.2239   0.2794   0.2927 ;
                 0.4229   0.4199   0.4719];

    % 2) 3 by 1 vector of grid points for parental discount factor process
    betap_vec = [0.8469 ;
                 0.9467 ;
                 0.9902];

    % 3) 4 by 2 matrix of "data moments" for the parental discount factor
    % process, to be reported in the paper.
    % column 1 = averages, column 2 = standard deviations of adult discount factors
    % row 1 = high school or less, row 2 = some college or college degree,
    % row 3 = graduate degree, row 4 = all respondents
    osaka_moments_data = [0.9279    0.0710 ;
                          0.9352    0.0637 ;
                          0.9451    0.0567 ;
                          0.9356    0.0642];
end

% Define cdfs for each type
betap_cdf = NaN(fp.NN_p,fp.N_educ_betap); % beta levels x education levels
for jj = 1:fp.N_educ_betap
    betap_cdf(:,jj) = cumsum(betap_pdf(:,jj));
end

% Save output for later use
fp.osaka_moments_data = osaka_moments_data;
fp.betap_cdf = betap_cdf;
fp.betap_vec = betap_vec;
% disp('Parental discount factor distribution - cdf and pdf')
% disp(betap_cdf)
% disp(betap_pdf)

% Next, generate random draws of beta_p for every simulated household
rng('default')
betap_draws = rand(fp.N,fp.R); % random uniform numbers - change these when we bootstrap SEs
fp.beta_p_mat = sim_init_betap(data,fp,col_long,betap_draws);

% Calculating "SIMULATED" moments of beta_p draws by education category (ALL, hs, sc/coll, grad)
betap_moments_sim = NaN(fp.N_educ_betap+1, 2); % rows = by SES cat / all hh, columns = mean/stdev
betap_moments_sim(end,:) = [nanmean(fp.beta_p_mat(:)) nanstd(fp.beta_p_mat(:))];

% define 3 EDUC categories: high school or less / some college or college / graduate
% Note: for PSID/CDS data that means
educvec1 = [0 13 17]; % must have same dimensions as fp.N_educ_betap and fp.N_educ_betac!
educvec2 = [12 16 20];
% For Steinberg and Osaka data sets, respectively, these cutoffs would be different due to differing SES variable definitions!

for ii = 1:length(educvec1)
    tmprows = nan(fp.N,1);
    for j = 1:fp.N
        educ_j = data(1+(j-1)*(fp.M+1),col_long.dad_educ);
        tmprows(j,1) = educ_j>= educvec1(ii) & educ_j<= educvec2(ii);
    end
    tmpp = fp.beta_p_mat(tmprows==1,:);
    betap_moments_sim(ii,:) = [nanmean(tmpp(:)) nanstd(tmpp(:))];
end

fp.betap_moments_sim = betap_moments_sim; % save for write_output_paper.m, see Table 12 in paper
% Uncomment to see data vs. simulated moments of parental discount factor distribution
% disp('Osaka DATA moments (mean/std) by education category (low/middle/high/all hh):')
% osaka_moments_data
% disp('Parental beta_p SIM moments (mean/std) by education category (low/middle/high/all hh):')
% betap_moments_sim

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Load and clean Steinberg et al. data on children's discount factors
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% For every simulated household, we need an initial draw for the
% child's patience capital. Note that the model starts at whatever age the
% child is in 1997, when we first observe the household in the CDS.

% In our baseline model, we define the child's initial discount factor as a
% random draw that is allowed to be correlated with the parent's education level

filename2 = [data_dir,'steinberg_data_nomiss.csv'];
if exist(filename2,'file') > 0 % data file exists

    [betac_pdf,betac_vec,steinberg_moments_data] = data_load_steinberg(fp.NN_c,fp.N_educ_betac,data_dir,paper_dir);
    
else
    % if the Steinberg raw data file does not exist, we want the code to proceed
    % anyway. The output of "data_load_steinberg" only contains summary statistics; not
    % any individual identifiers from any of the Steinberg survey respondents.
    % We also report these summary stats in the paper, so they can be
    % retrieved from there as well (see e.g. Table "Discount factors of parents and children- Data vs model simulation")

    disp('Raw Steinberg data file is not available => loading final results only so code can proceed')

    % The three key outputs are:
    % 1) 3 by 3 matrix of calibrated conditional probability distribution of falling into discount factor low/middle high (rows) conditional on parents education level = low/middle/high (columns)
    betac_pdf = [0.4030    0.3006    0.2432 ; 
                 0.3134    0.3468    0.3243 ; 
                 0.2836    0.3526    0.4324];

    % 2) 3 by 1 vector of grid points for children's discount factor process
    betac_vec = [0.3426 ;
                 0.7728 ; 
                 0.9817];

    % 3) 10 by 2 matrix of "data moments" for the parental discount factor
    % process, to be reported in the paper.
    % column 1 = averages, column 2 = standard deviations of child/adolescent discount factors
    % rows 1-4 = all children ages 10-17, parental education category = low/middle/high/all
    % rows 5-8 = children ages 10-12, parental education category = low/middle/high/all
    % rows 9-12 = children ages 13-17, parental education category = low/middle/high/all
    steinberg_moments_data = [0.6573    0.3037 ; 
                              0.7100    0.2927 ; 
                              0.7972    0.2286 ; 
                              0.6988    0.2933 ; 
                              0.6201    0.3246 ; 
                              0.6946    0.3130 ; 
                              0.7969    0.2105 ;
                              0.6797    0.3105 ;
                              0.6761    0.2927 ;
                              0.7204    0.2791 ;
                              0.7974    0.2436 ;
                              0.7103    0.2825];
end

% Save output for later use
fp.steinberg_moments_data = steinberg_moments_data;
fp.betac_vec = betac_vec;

% Invert child discount factors (defined betac = n/(1+n)) to find the stock of patience capital, n
fp.ngrid = betac_vec./(1-betac_vec); % make sure it is a column vector of size fp.NN_c x 1

% disp('Possible grid values that log(n_t) or n_t can take:')
% disp(log(fp.ngrid))
% disp(fp.ngrid)

% Uncomment to see data moments of child discount factor distribution
% disp('Steinberg data moments (mean/std/median) by education category (bottom row = all hh):')
% steinberg_moments_data

% Note: simulated child discount factors are endogenously determined in the
% model, so we cannot calculate those moments yet like we can for parents.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Random number draws
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% 1) draws for wife's wage, husband's wage, nonlabor income (epsilon shocks)
rand('state',940);
fp.wage_draws = norminv(rand(fp.N,fp.R,fp.M+1,3));
% yields standard normal numbers (between -inf and +inf)
% Now we center them around 0 and 1 (useful if R is small)
for b = 1:3
    temp_vec = fp.wage_draws(:,:,:,b);
    temp_vec = temp_vec(:);
    temp1 = mean(temp_vec);
    temp2 = std(temp_vec,1);
    fp.wage_draws(:,:,:,b) = (fp.wage_draws(:,:,:,b)-temp1)/temp2;
end

% Also draw some shocks for the non-labor income binary draw of I=0 or I>0 (logit: see NLI_sim_funct)
rand('state',1982);
fp.NLI_unif_draws = rand(fp.N,fp.R,fp.M+1,1);

% 2) uniform draws for child quality, based on assumption that the initial prior
% distribution of "p" (see also REStud section 3.3) is uniform over [0,1]
rand('state',1004);
fp.beta_draws = rand(fp.N,fp.R);

% 3) draws for test scores, see below
rand('state',1258);
fp.score_draws = rand(fp.N,fp.R,fp.M+1);

% 4) The model has a discrete markov process for beta_c (or patience
% capital n), so we need uniform draws for that to determine transitions
% from n_t to n_t+1, and for the initial draws conditional on parent SES
rand('state',48546);
fp.n_draws = rand(fp.N,fp.R,fp.M);

% Draw random uniform numbers to determine initial draws
rand('state',45616958);
fp.betac_init_unifdraws = rand(fp.N,fp.R);

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Pre-draw initial latent k level drawn from beta distribution
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

if fp.do_beta_draws == 1
    disp('Drawing initial latent k values - ONLY NEED TO DO THIS ONCE');

    % The function k = betainv used below gives the inverse of the beta cdf, i.e. for a beta cdf with given
    % parameters alpha and beta (which depend on the "observed" test score, k*), a value less than or equal to k will occur
    % with probability p = fp.beta_draws(i,r) = the prior.

    % Thus, temp_beta_draws_by_measure contains the latent child quality k
    % given an "observed" but noisy score of k_meas_here (= k*), assuming a uniform
    % prior p. This will be monotonically increasing in k*, and always be
    % between 0 and 1 (since this is just "latent" quality).
    % For every (i,r) iteration, fp.beta_draw(i,r) will be the uniform prior p
    % This p will be updated for every possible observed test score k*

    % beta distribution parameters: think of (alpha-1) as number of
    % successes, and (beta-1) as the number of failures

    temp_beta_draws_by_measure = NaN(fp.N,fp.R,fp.test_max);
    for i = 1:fp.N
        for r = 1:fp.R
            for k_meas_here = 1:1:fp.test_max
                alpha = 1 + k_meas_here; %beta distribution parameter
                beta = fp.test_max - k_meas_here + 1; %beta distribution parameter
                temp_beta_draws_by_measure(i,r,k_meas_here) = betainv(fp.beta_draws(i,r),alpha,beta); %draw from beta
            end
        end
    end
    save([output_dir 'temp_beta_draws'],'temp_beta_draws_by_measure');
end

% load temp_beta_draws temp_beta_draws_by_measure;
load([output_dir 'temp_beta_draws'],'temp_beta_draws_by_measure');
fp.beta_draws_by_measure = temp_beta_draws_by_measure;

% Note: we only need to do this step once, so can turn off the dummy
% "do_beta_draws" afterwards.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Calculate data moments
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

disp('Calculating data moments')
mom = build_moments(data,col_long,fp,1);
data_mom = mom.moments_all;

% also save the indexes (useful in compare_data_sim.m)
fp.index_moments_1 = mom.index_moments_1; % time inputs, wages and labor supply choices grouped by 4 child age bins
fp.index_moments_2 = mom.index_moments_2; % LW score by child age (per year)
fp.index_moments_3 = mom.index_moments_3; % parental wages by educ/age bins
fp.index_moments_LW = mom.index_moments_LW; % moments that capture the input/output process (unconditional means)
fp.index_moments_tau1 = mom.index_moments_tau1; % correlations between tau1 and LW measures
fp.index_moments_tau2 = mom.index_moments_tau2; % correlations between tau2 and LW measures
fp.index_moments_tauc = mom.index_moments_tauc; % correlations between tau_c and LW measures
fp.index_moments_educ = mom.index_moments_educ; % relate education to test scores and time inputs
fp.index_moments_CCT = mom.index_moments_CCT; % relate CCT use (as observed through allowances) to child age and father's education
fp.index_moments_external = mom.index_moments_external; % external moments: fraction of child expenditures + CCT use

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Bootstrap weighting matrix
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% Run block bootstrap over fp.N number of households
if fp.do_boot == 1
    display('Calculating bootstrapped data moment weight matrix - ONLY NEED TO DO THIS ONCE');
    tic
    nr_varying = length(mom.moments_varying);
    boot_moments = zeros(nr_varying,fp.B); % pre-define

    %random number draws for bootstrap
    %this creates fp.N x fp.B matrix of random data integers between 1 and fp.N, with replacement
    rand('state',1131);
    draws = randi(fp.N,fp.N,fp.B);

    for b = 1:fp.B
        %randomly re-draw with replacement from original data
        fp.rand_order = draws(:,b); % fp.N by 1 vector of integers (with replacement)
        boot_data = [];
        for q = 1:fp.N
            %block for mother with id == rand_order(q)
            boot_data = [boot_data; data( (data(:,col_long.id) == fp.rand_order(q)),:)];
        end
        %calculate moments for the current data draw
        temp = build_moments(boot_data,col_long,fp,1); % using bootstrapped data now! See beginning of moment_funct.m
        boot_moments(:,b) = temp.moments_varying;
    end

    % Which elements of moment vectors have any variation?
    %--some moments defined may have NaN since out of age range, etc.
    temp = std(boot_moments,[],2);
    moments_with_variation = (temp > 0); % save the indices of the used moments
    boot_moments = boot_moments(moments_with_variation,:);
    [numb_moments,fp.B] = size(boot_moments);
    % Verify:
    %     sum(moments_with_variation==1)
    %     sum(moments_with_variation==0)

    %create bootstrapped covariance matrix, off-diagonal elements set to zero
    cov = zeros(numb_moments,1);
    for b = 1:fp.B
        diff = boot_moments(:,b) - mean(boot_moments,2);
        cov = cov + (1/fp.B).*diff.^2;
    end
    est_wt = inv( diag(cov) );

    % Add external moments with weights such that their contribution to the
    % goal function is large enough and comparable to the contribution of
    % the other moments:
    est_wt(numb_moments+1,numb_moments+1) = 0.010; % average level of child expenditures: x+e
    est_wt(numb_moments+2,numb_moments+2) = 5000; % average fraction of child expenditures: (x+e)/Y
    for ii=3:10 % number of external moments related to child betas
        est_wt(numb_moments+ii,numb_moments+ii) = 10000; % child discount factor: 8 moments from Steinberg (2 age categories and 3 education types+all)
    end

    % Manually increase weight on other important moments:
    est_wt(fp.index_moments_educ,fp.index_moments_educ) = 10 * est_wt(fp.index_moments_educ,fp.index_moments_educ);

    % June 2018: artificially increase weight on some important moments:
    % 1) on all time inputs and labor supply means:
    tmp = fp.index_moments_1;
    % 2) on raw test scores:
    est_wt(fp.index_moments_2,fp.index_moments_2) = 10 * est_wt(fp.index_moments_2,fp.index_moments_2);
    % 3) on CCT use:
    est_wt(fp.index_moments_CCT,fp.index_moments_CCT) = 5 * est_wt(fp.index_moments_CCT,fp.index_moments_CCT);

    save([output_dir 'temp_weight_matrix'],'est_wt');
    save([output_dir 'temp_variation'],'moments_with_variation');
    toc
end

load([output_dir 'temp_weight_matrix'],'est_wt');
fp.est_wt = est_wt;
load([output_dir 'temp_variation'],'moments_with_variation');
fp.moments_with_variation = moments_with_variation;

if any(moments_with_variation==0)
    error('Warning: some moments which should vary have no variation!')
    % If this happens, either delete the moment or set its weight
    % exogenously equal to 0
end

% default setting: use all the moments in estimation
fp.moments_used = ones(length(data_mom),1);

fp.moments_used(fp.index_moments_1)     = 1;     % time inputs, wages and labor supply choices grouped by 4 child age bins
fp.moments_used(fp.index_moments_2)     = 1;     % mean LW score (levels) by child age (per age-year)
fp.moments_used(fp.index_moments_3)     = 1;     % parental wages by educ/age bins
fp.moments_used(fp.index_moments_LW)    = 1;     % mean/stdev/corr of LW scores (levels and changes) by age bin
fp.moments_used(fp.index_moments_tau1)  = 1;     % correlations between tau1 and LW measures
fp.moments_used(fp.index_moments_tau2)  = 1;     % correlations between tau2 and LW measures
fp.moments_used(fp.index_moments_tauc)  = 1;     % correlations between tau_c and LW measures
fp.moments_used(fp.index_moments_educ)  = 1;     % correlations between educ, LW and time inputs
fp.moments_used(fp.index_moments_CCT)   = 1;     % moments relating CCT use to parental education/time inputs/test scores
fp.moments_used(fp.index_moments_external) = 1;  % external moments on child expenditures (based on CRC report) and child discount factors (based on Steinberg data)

for i = 1:length(data_mom)
    if fp.moments_used(i) == 0
        est_wt(i,i) = 0;
    end
end
fp.est_wt = est_wt;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Create initial guesses for all model parameters
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% Note: if running the code for the first time, the code needs an initial
% guess for (1) the main model parameters defined in fp.par, and (2) the non-labor income
% process parameters. 

% For simplicity, we provide the initial guesses equal to the final estimates, and save them
% in their appropriate folders, so you don't need to re-estimate the whole
% model in order to run the entire code. 

% 1) Initial guess for the 43 main model parameters defined in fp.par (needs to be a
% vector with 43 guesses, one for each parameter)
param_est = [0.3502;  0.7304;  0.7071;  2.4320;  0.3066; -1.4084; -0.1209;  0.0297; -2.2555; -0.1050;
             0.0382; -6.5982;  0.2705; -7.1541;  0.0717; -0.2537;  0.0050; -1.0909; -2.2472;  0.6443;
             0.5410;  0.9411;  1.6775; -4.4354; -4.3860; -2.4319; -2.5686; -0.8233; -0.4310;  1.8294;
            -5.7136; -0.7884;  0.0165; -0.4205; -0.0374; -2.1779;  0.0113; 10.8308; -0.2776; -0.4580;
             2.5983; -0.1770; -0.2391]'; % 1 by 43 vector of (final) estimates
save([output_dir 'temp_estimates'],'param_est');

% 2) Initial guess for the 15 non-labor income parameters defined in fp.par (needs to be a
% vector with 15 guesses, one for each parameter
NLI_par = [-5.0117; -10.9003; 1.0001; -9.9766; -2.1635;
            0.9987; -1.4382; 2.2054; -12.9741; 1.0000;
            1.0032; -3.0981; 1.0000;  0.9993;  0.2615]'; % 1 by 15 vector of (final) estimates
save([output_dir 'temp_estimates_NLI_process'],'NLI_par');

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Estimate (truncated, non-negative) non-labor income process
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

output = run_income_process(data,col_long,fp,output_dir); % see m-file

fp.NLI_par = output.NLI_param;% transformed parameters
fp.NLI_sim_data = output.NLI_sim_data; % matrix of simulated data
fp.NLI_sim_data_3d = output.NLI_sim_data_3d; % matrix of simulated data in 3d format
fp.NLI_sim = fp.NLI_sim_data(:,col_long.nonlabor_income); % this is used in write_output_paper.m
fp.NLI_data_mom = output.NLI_data_mom;
fp.NLI_sim_mom = output.NLI_sim_mom;
load([output_dir 'temp_NLI_weight_matrix'],'NLI_wt_matrix');
fp.NLI_wt_matrix = NLI_wt_matrix;
load([output_dir 'temp_NLI_variation'],'NLI_moments_with_variation');
fp.NLI_moments_with_variation = NLI_moments_with_variation;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Define model primitive parameters
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% define the indices for each primitive parameter of the model

% Instantaneous utility functions
par.alpha1 = 1;     % mom leisure l_1
par.alpha2 = 2;     % dad leisure l_2
par.alpha3 = 3;     % parents private consumption c
par.lambda1 = 4;    % child leisure l_c
par.lambda2 = 5;    % child private consumption x

end_ind = 5;

% Cognitive skill technology parameters
par.delta1_intercept = end_ind+1;   % mom time - intercept
par.delta1_slope = end_ind+2;       % mom time - age slope
par.delta1_educ = end_ind+3;        % mom time - education slope
par.delta2_intercept = end_ind+4;   % dad time - intercept
par.delta2_slope = end_ind+5;       % dad time - age slope
par.delta2_educ = end_ind+6;        % dad time - education slope
par.delta3_intercept = end_ind+7;   % child self-investment time - intercept
par.delta3_slope = end_ind+8;       % child self-investment time - age slope
par.delta4_intercept = end_ind+9;   % child expenditures - intercept
par.delta4_slope = end_ind+10;      % child expenditures - age slope
par.delta5_intercept = end_ind+11;  % lagged skills - intercept
par.delta5_slope = end_ind+12;      % lagged skills - age slope
% TFP process: logistic shape with 4 parameters L, L0, k, t0
par.L = end_ind + 13;               % TFP - L
par.L0 = end_ind + 14;              % TFP - L0
par.k = end_ind + 15;               % TFP - k
par.t0 = end_ind + 16;              % TFP - t0

end_ind = end_ind + 16;

% Parental hourly wage offers
par.beta0_mom = end_ind+1; % mom hourly wage - intercept
par.beta0_dad = end_ind+2; % dad hourly wage - intercept
par.beta1_mom = end_ind+3; % mom - return to experience (age)
par.beta1_dad = end_ind+4; % dad - return to experience
par.beta2_mom = end_ind+5; % mom - return to schooling
par.beta2_dad = end_ind+6; % dad - return to schooling
par.sigma_mom = end_ind+7; % mom - standard deviation of wage shocks (epsilons)
par.sigma_dad = end_ind+8; % dad - standard deviation of wage shocks
par.wage_corr = end_ind+9; % correlation in epsilon draws for wages

end_ind = end_ind + 9;

% Parameter governing the distribution of internal CCT costs
% Assume it is the mean of an exponential distribution, and keep cost draws fixed within households
par.cost_mean = end_ind + 1;

end_ind = end_ind + 1;

% Parameters of the discretized Markov process for child discount factor

% Bottom and Middle state: parametrize probability to go up one level
par.b0_up = end_ind+1; % bottom state, intercept
par.b1_up = end_ind+2; % bottom state, age slope
par.b2_up = end_ind+3; % bottom state, CCT slope
par.b3_up = end_ind+4; % bottom state, interaction age*CCT
% Middle and Top state: parametrize probability to go down one level
par.b0_down = end_ind+5; % top state, intercept
par.b1_down = end_ind+6; % top state, age slope
% Note: we assume symmetry to reduce parameter space
%     par.b2_down = end_ind+7; % top state, CCT slope
%     par.b3_down = end_ind+8; % top state, interaction age*CCT

end_ind = end_ind + 6;

% Parameters of the initial conditions for beta_{c,t0}, where t0 = age
% in 1997 when we first observe household in CDS
% We assume Pr(beta^j) = logit(nu^j_0 + nu^j_1 * t0 + nu^j_2 * dad_educ) for j = 1,2
par.nu10 = end_ind+1; % bottom state, intercept
par.nu11 = end_ind+2; % bottom state, age slope
par.nu12 = end_ind+3; % bottom state, educ slope
par.nu20 = end_ind+4; % middle state, intercept
par.nu21 = end_ind+5; % middle state, age slope
par.nu22 = end_ind+6; % middle state, educ slope

% save them for the estimation procedure below; fminsearch does not accept
% structures as inputs; "param" has to be a regular vector;
fp.par = par; % final model has 43 parameters to be estimated jointly (excluding the exogenous processes like non-labor income and parental discount factors)

% Optionally: Create some additional graphs and tables with summary statistics for the paper (see Appendix E)
graphs_intro(data,col_long,fp,paper_dir)

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Estimate parameters of baseline model
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

if fp.estimate == 1
    % Load initial parameter vector
    load([output_dir 'temp_estimates'],'param_est');
    param_start = param_est;
    disp('Estimating model parameters using fminsearch...')
    tic
    fp.options_est
    [param_est,obj_eval_est,exitflag] = fminsearch_matt(@(param) ...
        build_obj(param,fp,col_long,data,data_mom), param_start, fp.options_est);

    exitflag
    obj_eval_est

    % Save results (overwrite previous estimates or initial guess)
    save([output_dir 'temp_estimates'],'param_est');
    toc
end % if

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Simulate data at current parameter estimates
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

if fp.simulate == 1
    % Load estimates for which we want to simulate data
    load([output_dir 'temp_estimates'],'param_est');

    disp('Simulating data and moments at current parameter estimates...');
    tic
    fp.estimate = 0; % no estimation: report full results
    obj_eval = build_obj(param_est,fp,col_long,data,data_mom);

    % Unpack relevant objects
    sim_data = obj_eval.sim_data;           % simulated data
    param_vector = obj_eval.param_vector;   % parameter estimates
    sim_mom = obj_eval.sim_mom;             % simulated moments
    toc

    % Create relevant tables and figures to put in final paper
    write_output_paper(data, sim_data, col_long, fp, param_vector, obj_eval, output_dir, paper_dir, data_mom, sim_mom)

    disp('Objective function value at current estimates:')
    disp(obj_eval.obj_value)
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Calculate Standard Errors using bootstrap method
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

if fp.do_boot_se == 1
    % STEPS
    % 0) Draw new vectors of shocks
    % 1) Draw a random sample of N households with replacement
    % 2) Estimate the NLI process for these households and generate random income draws according to these parameters
    % 3) Use these income draws (and the SAME random sample of households) to estimate the other parameters of the model
    % 4) Save all 41+9 parameters in a matrix
    % 5) Do this B_SE = 50 times
    % 6) Calculate the SEs by taking the standard deviation across all 50 draws

    % Step 0: Draw new vectors of shocks

    % Wage draws for bootstrapping:
    rand('state',942);
    fp.wage_draws_BS = norminv(rand(fp.N,fp.R,fp.M+1,3,fp.B_SE));
    % yields standard normal numbers (between -inf and +inf)
    % Now we center them around 0 and 1 (useful if R is small)
    for k = 1:3
        for b = 1:fp.B_SE
            temp_vec = fp.wage_draws_BS(:,:,:,k,b);
            temp_vec = temp_vec(:);
            temp1 = mean(temp_vec);
            temp2 = std(temp_vec,1);
            fp.wage_draws_BS(:,:,:,k,b) = (fp.wage_draws_BS(:,:,:,k,b)-temp1)/temp2;
        end
    end

    % Also draw some shocks for the I=0 or I>0 process (logit: see NLI_sim_funct)
    rand('state',1939);
    fp.NLI_unif_draws_BS = rand(fp.N,fp.R,fp.M+1,fp.B_SE);

    % 2) uniform draws for child quality, based on assumption that the initial prior
    % distribution of "p" (see also REStud section 3.3) is uniform over [0,1]
    rand('state',1007);
    fp.beta_draws_BS = rand(fp.N,fp.R,fp.B_SE);

    % 3) draws for test scores, see below
    rand('state',1261);
    fp.score_draws_BS = rand(fp.N,fp.R,fp.M+1,fp.B_SE);

    % 4) draws for parental discount factors
    rand('state',9413);
    fp.betap_draws_BS = rand(fp.N,fp.R,fp.B_SE);

    % 5) draw random uniform numbers to determine periodic markov transitions
    % for child's discount factor
    rand('state',1498156);
    fp.n_draws_BS = rand(fp.N,fp.R,fp.M,fp.B_SE);

    % Draw random uniform numbers to determine initial draws
    rand('state',17456);
    fp.betac_init_unifdraws_BS = rand(fp.N,fp.R,fp.B_SE);

    % 6) generate 50 "random" seed numbers to generate zeta cost draws in build_obj.m.
    fp.zeta_cost_seeds = [16984:5:(16984+5*(fp.B_SE-1))];

    % Pre-draw initial latent k level drawn from beta distribution
    %see also DFW REStud 2014, section 3.3

    disp('Drawing initial latent k values, bootstrap');

    % The function k = betainv used below gives the inverse of the beta cdf, i.e. for a beta cdf with given
    % parameters alpha and beta (which depend on the "observed" test score, k*), a value less than or equal to k will occur
    % with probability p = fp.beta_draws(i,r) = the prior.

    % Thus, temp_beta_draws_by_measure contains the latent child quality k
    % given an "observed" but noisy score of k_meas_here (= k*), assuming a uniform
    % prior p. This will be monotonically increasing in k*, and always be
    % between 0 and 1 (since this is just "latent" quality).
    % For every (i,r) iteration, fp.beta_draw(i,r) will be the uniform prior p
    % This p will be updated for every possible observed test score k*

    do_again = 0; % NOTE: set dummy equal to 1 only if running code for the first time, then set it to 0 to save time

    if do_again == 1
        tic
        beta_draws_by_measure_BS = NaN(fp.N,fp.R,fp.test_max,fp.B_SE);
        for i = 1:fp.N
            for r = 1:fp.R
                for k_meas_here = 1:1:fp.test_max
                    for b = 1:fp.B_SE
                        alpha = 1 + k_meas_here; %beta distribution parameter
                        beta = fp.test_max - k_meas_here + 1; %beta distribution parameter
                        beta_draws_by_measure_BS(i,r,k_meas_here,b) = betainv(fp.beta_draws_BS(i,r,b),alpha,beta); %draw from beta
                    end
                end
            end
        end
        toc
        save([output_dir 'temp_beta_draws_BS'],'beta_draws_by_measure_BS');
    elseif do_again == 0
        load([output_dir 'temp_beta_draws_BS'],'beta_draws_by_measure_BS');
    end

    % Step 1: Draw B_SE random samples of size N with replacement
    rand('state',1203);
    draws = randi(fp.N,fp.N,fp.B_SE); % use same draws for NLI and main model!
    % This creates an fp.N x fp.B_SE matrix of random integers

    % Generate randomly drawn households with replacement from original data
    boot_data_all = NaN(size(data,1),size(data,2),fp.B_SE);
    fp.beta_p_mat_boot = NaN(fp.N,fp.R,fp.B_SE);
    for b = 1:fp.B_SE
        fp.b_value = b; % remember the b-value to load the correct simulated non-labor incomes later on!

        % STEP 1: randomly re-draw households with replacement from original data
        fp.rand_order = draws(:,b);
        boot_data = [];
        for q = 1:fp.N
            % find the data block for the mother with id == rand_order(q)
            boot_data = [boot_data; data( (data(:,col_long.id) == fp.rand_order(q)),:)];
        end
        boot_data_all(:,:,b) = boot_data; % will be useful for parental discount factor draws

        if fp.run_model >= 2
            % Generate randomized parental discount factors conditional on parental education
            % load random number draws for this bootstrap round
            fp.beta_p_mat_boot(:,:,b) = sim_init_betap(boot_data,fp,col_long,fp.betap_draws_BS(:,:,b));
        end
    end

    % Step 2: Estimate the NLI process for each random sample and generate random income draws according to these parameters
    display('Starting bootstrap SE procedure for non-labor income process...')
    tic
    % Load initial parameter guess for estimation within each bootstrap round
    load([output_dir 'temp_estimates_NLI_process'],'NLI_par');
    %note: these are untransformed estimates, whereas the actual estimates we are interested in are transformed

    redo_BS_NLI = 1; % NOTE: set dummy equal to 1 only if running code for the first time, then set it to 0 to save time

    if redo_BS_NLI == 1
        param_est_boot_matrix_NLI = NaN(15,fp.B_SE); % save the 9 transformed NLI parameters for each bootstrap sample here = the ones we want to take SEs of
        sim_boot_matrix_NLI = NaN(fp.N*(fp.M+1)*fp.R,fp.B_SE); % save the simulated NLI for each bootstrap sample here
        sim_boot_matrix_NLI_3d = NaN(fp.M+1,fp.N,fp.R,fp.B_SE); % save the simulated NLI for each bootstrap sample here in 3D format
        boot_moments_mat_NLI = NaN(57,fp.B_SE,2); % contains moments x B_SE x 2, to save both the data and the simulated moments

        for b = 1:fp.B_SE
            display('--------------------------------------------------');
            display(['Estimating NLI process for bootstrap sample ',num2str(b),' of ',num2str(fp.B_SE)])

            % Load random household sample for bootstrap draw b
            boot_data = boot_data_all(:,:,b);

            % Random draws for this bootstrap round:
            fp.wage_draws = fp.wage_draws_BS(:,:,:,:,b);
            fp.NLI_unif_draws = fp.NLI_unif_draws_BS(:,:,:,b);

            % STEP 2: re-compute data moments
            boot_data_moments = NLI_moment_funct(boot_data,col_long);

            % STEP 3: re-estimate model, starting from current guess
            estimate_NLI_process = 1;
            fp.doing_bootstrap_se_NLI = 1; %use id_vec for bootstrap observations
            [param_est_boot,obj_eval_est,exitflag] = fminsearch_matt(@(param) ...
                NLI_obj_funct(param,fp,boot_data,col_long,boot_data_moments,estimate_NLI_process), ...
                NLI_par,fp.options_est_NLI);

            % STEP 4: get back transformed optimal parameters and simulated NLIs for this bootstrap sample
            estimate_NLI_process = 0;
            obj_eval = NLI_obj_funct(param_est_boot,fp,boot_data,col_long,boot_data_moments,estimate_NLI_process);

            % save data and simulated moments for each bootstrap round
            tmp = obj_eval.NLI_data_mom;
            boot_moments_mat_NLI(:,b,1) = tmp;
            tmp = obj_eval.NLI_sim_mom;
            boot_moments_mat_NLI(:,b,2) = tmp;

            % TRANSFORMED PARAMETERS ( = the ones we want to know the SEs of)
            param_est_boot_matrix_NLI(:,b) = obj_eval.NLI_param;
            % record simulated NLI for this bootstrap sample
            sim_boot_matrix_NLI(:,b) = obj_eval.NLI_sim_data(:,col_long.nonlabor_income);
            sim_boot_matrix_NLI_3d(:,:,:,b) = obj_eval.NLI_sim_data_3d;

            save([output_dir 'temp_param_est_boot_matrix_NLI'],'param_est_boot_matrix_NLI');
            fp.param_est_boot_matrix_NLI = param_est_boot_matrix_NLI;

            save([output_dir 'temp_sim_bootstrap_NLI'],'sim_boot_matrix_NLI');
            fp.sim_boot_matrix_NLI = sim_boot_matrix_NLI;

            save([output_dir 'temp_sim_bootstrap_NLI_3d'],'sim_boot_matrix_NLI_3d');
            fp.sim_boot_matrix_NLI_3d = sim_boot_matrix_NLI_3d;

            % Save the moments after each loop as well
            save([output_dir 'temp_boot_moments_NLI'],'boot_moments_mat_NLI');
            fp.boot_moments_mat_NLI = boot_moments_mat_NLI;
        end
    elseif redo_BS_NLI == 0
        % Load the correct matrices from memory
        load([output_dir 'temp_param_est_boot_matrix_NLI'],'param_est_boot_matrix_NLI');
        fp.param_est_boot_matrix_NLI = param_est_boot_matrix_NLI;
        load([output_dir 'temp_sim_bootstrap_NLI'],'sim_boot_matrix_NLI');
        fp.sim_boot_matrix_NLI = sim_boot_matrix_NLI;
        load([output_dir 'temp_sim_bootstrap_NLI_3d'],'sim_boot_matrix_NLI_3d');
        fp.sim_boot_matrix_NLI_3d = sim_boot_matrix_NLI_3d;
        load([output_dir 'temp_boot_moments_NLI'],'boot_moments_mat_NLI');
        fp.boot_moments_mat_NLI = boot_moments_mat_NLI;
    end % if
    toc
    fp.doing_bootstrap_se_NLI = 0;

    % Step 3: Estimate the other parameters of the model
    % Importantly: We must use the non-labor income draws generated above and the SAME
    % random sample of households in each loop b = 1:B_SE (see Step 1)

    display('Starting bootstrap SE procedure for the model...')
    tic

    % Load initial parameter guess for estimation within each bootstrap round
    load([output_dir 'temp_estimates'],'param_est');

    for b = 1:fp.B_SE
        fp.b_value = b; % remember the b-value to load the correct simulated non-labor incomes later on!
        display(['Estimating model for bootstrap sample ',num2str(b),' of ',num2str(fp.B_SE)])
        % STEP 1: randomly re-draw households with replacement from original data
        fp.rand_order = draws(:,b);
        boot_data = [];
        for q = 1:fp.N
            % find the data block for the mother with id == rand_order(q)
            boot_data = [boot_data; data( (data(:,col_long.id) == fp.rand_order(q)),:)];
        end

        % Load shocks for this bootstrap round
        fp.wage_draws = fp.wage_draws_BS(:,:,:,:,b);
        fp.score_draws = fp.score_draws_BS(:,:,:,b);
        if fp.run_model == 1 % with preference heterogeneity
            fp.pref_draws = fp.pref_draws_BS(:,:,:,b);
        end
        fp.beta_draws_by_measure = beta_draws_by_measure_BS(:,:,:,b);
        if fp.run_model == 3
            fp.n_draws = fp.n_draws_BS(:,:,:,b);
            % Draw random uniform numbers to determine initial draws
            fp.betac_init_unifdraws = fp.betac_init_unifdraws_BS(:,:,b);
        end

        % Randomized parental discount factors for this round:
        fp.beta_p_mat = fp.beta_p_mat_boot(:,:,b);

        % STEP 2: re-compute data moments
        temp = build_moments(boot_data,col_long,fp,1);
        boot_data_moments = temp.moments_all;

        % STEP 3: re-estimate model, starting from current guess
        %      MAKE SURE TO PASS ON THE CORRECT NLI HERE, simulated using the NLI estimates for this specific bootstrap sample (see above)
        % also see beginning of sim_funct.m

        fp.estimate = 1;
        fp.doing_bootstrap_se = 1; %use id_vec for bootstrap observations
        [param_est_boot,obj_eval_est,exitflag] = fminsearch_matt(@(param) ...
            build_obj(param,fp,col_long,boot_data,boot_data_moments), ...
            param_est,fp.options_est_BSE);

        % STEP 4: get back transformed parameters
        fp.estimate = 0;
        obj_eval = build_obj(param_est_boot,fp,col_long,boot_data,boot_data_moments);

        % load results calculated so far (again, if we split up bootstrap
        % reps across several matlab terminals, so we load the most recently
        % saved file before adding each completed rep)
        load([output_dir 'temp_param_bootstrap'],'param_est_boot_matrix');
        fp.param_est_boot_matrix = param_est_boot_matrix;

        load([output_dir 'temp_boot_moments'],'boot_moments_mat');
        fp.boot_moments_mat = boot_moments_mat;

        % save data and simulated moments for each bootstrap round
        boot_moments_mat(:,b,1) = boot_data_moments;
        tmp = obj_eval.sim_mom;
        boot_moments_mat(:,b,2) = tmp;

        % STEP 5: record parameter estimates (see end of obj_funct.m)
        param_est_boot_matrix(:,b) = obj_eval.param_vector;
        % save results after each loop
        save([output_dir 'temp_param_bootstrap'],'param_est_boot_matrix');
        fp.param_est_boot_matrix = param_est_boot_matrix;
        % Save the moments after each loop as well
        save([output_dir 'temp_boot_moments'],'boot_moments_mat');
        fp.boot_moments_mat = boot_moments_mat;
    end

    % Save the final matrix somewhere
    save([output_dir 'temp_param_bootstrap'],'param_est_boot_matrix');
    fp.param_est_boot_matrix = param_est_boot_matrix;

    fp.doing_bootstrap_se = 0;
    toc
end

%% Comparative Statics 1: vary CCT costs
if fp.compstat_CCT == 1
    disp('Now running "COMP STAT for CCT PARAMETERS" code...');
    % First load the fixed set of estimates for which we want to run the models
    load([output_dir 'temp_estimates'],'param_est');
    param_est_fixed = param_est;

    nr_models = 5; % compare baseline model to 4 alternatives

    sim_data_all = NaN((fp.M+1)*fp.R*fp.N, fp.tot_cols, nr_models); % pre-allocate all simulated data
    for mm = 1:nr_models

        if mm == 1
            disp('(1) Baseline: Simulating data for baseline model ');
            param_est_mm = param_est_fixed; % load original parameters

        elseif mm == 2
            disp('(2) Simulating data for model with infinite CCT cost: kappa = infinite');
            param_est_mm = param_est_fixed; % load original parameters
            param_est_mm(fp.par.cost_mean) = 50; % arbitrarily large so that exp(cost_mean) = +Infinity

        elseif mm == 3
            disp('(3) Simulating data for model with zero CCT cost: kappa = 0');
            param_est_mm = param_est_fixed; % load original parameters
            param_est_mm(fp.par.cost_mean) = -1e10; % arbitrarily negative so that exp(cost_mean) = 0

        elseif mm == 4
            disp('(4) Simulating data for model with exogenous child betas: b2_up = b3_up = 0');
            param_est_mm = param_est_fixed; % load original parameters
            param_est_mm(fp.par.b2_up) = 0; % CCT intercept parameter in Markov process
            param_est_mm(fp.par.b3_up) = 0; % CCT age slope parameter in Markov process

        elseif mm == 5
            disp('(5) Simulating data for model with zero CCT cost and exogenous child betas: kappa = b2_up = b3_up = 0');
            param_est_mm = param_est_fixed; % load original parameters
            param_est_mm(fp.par.cost_mean) = -1e10; % arbitrarily negative so that exp(cost_mean) = 0
            param_est_mm(fp.par.b2_up) = 0; % CCT intercept parameter in Markov process
            param_est_mm(fp.par.b3_up) = 0; % CCT age slope parameter in Markov process
        end

        tic
        fp.estimate = 0; % no estimation: report full results
        obj_eval = build_obj(param_est_mm,fp,col_long,data,data_mom);
        disp('Objective function value at current estimates:')
        disp(obj_eval.obj_value)
        sim_data = obj_eval.sim_data; % save simulated data
        toc

        % save "corrected" simulated data in the big matrix
        sim_data_all(:,:,mm) = sim_data;
        save([output_dir 'temp_compstat_CCT'],'sim_data_all');  % transfer of 500$ at age t=15
    end % i loop

    % Create table with output for paper
    tables_compstat_CCT(col_long,fp,output_dir)
end

%% Comparative Statics 2: shut down SES heterogeneity in wages/tech/time prefs

if fp.compstat_SES == 1

    disp('Now running "COMP STAT for SES PARAMETERS" code...');
    load([output_dir 'temp_estimates'],'param_est');

    nr_models = 5; % compare baseline model to 4 alternatives
    sim_data_all = NaN((fp.M+1)*fp.R*fp.N, fp.tot_cols, nr_models); % pre-allocate all simulated data

    fp.s1_median = median(data(:,col_long.mom_educ));
    fp.s2_median = median(data(:,col_long.dad_educ));
    fp.age1_median = median(data(:,col_long.mom_age));
    fp.age2_median = median(data(:,col_long.dad_age));

    for mm = 1:nr_models
        if mm == 1
            disp('(1) BASELINE 1 = allow for full heterogeneity in betas, technology and wages');
            fp.compstat_age_homwages = 0;   % equal to 0 if parental wages and NLI are allowed to depend on parental age
            fp.compstat_SES_homwages = 0;   % equal to 0 if parental wages and NLI are allowed to depend on parental education
            fp.compstat_SES_homtech = 0;    % equal to 0 if parental time productivities are allowed to depend on parental education
            fp.compstat_SES_homprefs = 0;   % equal to 0 if the child's initial discount factor distribution AND the parental discount factor distribution are allowed to depend on the father's education

        elseif mm == 2
            disp('(2) BASELINE 2 = shut down age-dependence in wages/NLI, but allow for SES-heterogeneity in betas, technology and wages');
            % maybe a better point of comparison, since it gets rid of age gaps between low and high SES people.
            fp.compstat_age_homwages = 1;   % equal to 0 if parental wages and NLI are allowed to depend on parental age
            fp.compstat_SES_homwages = 0;   % equal to 0 if parental wages and NLI are allowed to depend on parental education
            fp.compstat_SES_homtech = 0;    % equal to 0 if parental time productivities are allowed to depend on parental education
            fp.compstat_SES_homprefs = 0;   % equal to 0 if the child's initial discount factor distribution AND the parental discount factor distribution are allowed to depend on the father's education

        elseif mm == 3
            disp('(3) Alternative 1: shut down heterogeneity in wages/NLI, allow for SES-dependence in betas and technology');
            fp.compstat_age_homwages = 1;   % equal to 0 if parental wages and NLI are allowed to depend on parental age
            fp.compstat_SES_homwages = 1;   % equal to 0 if parental wages and NLI are allowed to depend on parental education
            fp.compstat_SES_homtech = 0;    % equal to 0 if parental time productivities are allowed to depend on parental education
            fp.compstat_SES_homprefs = 0;   % equal to 0 if the child's initial discount factor distribution AND the parental discount factor distribution are allowed to depend on the father's education

        elseif mm == 4
            disp('(4) Alternative 2: shut down heterogeneity in wages/NLI and tech, allow for SES-dependence in betas');
            fp.compstat_age_homwages = 1;   % equal to 0 if parental wages and NLI are allowed to depend on parental age
            fp.compstat_SES_homwages = 1;   % equal to 0 if parental wages and NLI are allowed to depend on parental education
            fp.compstat_SES_homtech = 1;    % equal to 0 if parental time productivities are allowed to depend on parental education
            fp.compstat_SES_homprefs = 0;   % equal to 0 if the child's initial discount factor distribution AND the parental discount factor distribution are allowed to depend on the father's education

        elseif mm == 5
            disp('(5) Alternative: Shut down all SES-dependence');
            fp.compstat_age_homwages = 1;   % equal to 0 if parental wages and NLI are allowed to depend on parental age
            fp.compstat_SES_homwages = 1;   % equal to 0 if parental wages and NLI are allowed to depend on parental education
            fp.compstat_SES_homtech = 1;    % equal to 0 if parental time productivities are allowed to depend on parental education
            fp.compstat_SES_homprefs = 1;   % equal to 0 if the child's initial discount factor distribution AND the parental discount factor distribution are allowed to depend on the father's education

        end

        % Re-simulate parental discount factors (conditional on fp.compstat_SES_homprefs: default value is 0)
        fp.beta_p_mat = sim_init_betap(data,fp,col_long,betap_draws);

        % Re-simulate household non-labor income draws (conditional on fp.compstat_age_homwages AND fp.compstat_SES_homwages)
        output = run_income_process(data,col_long,fp,output_dir); % see m-file
        fp.NLI_sim_data_3d = output.NLI_sim_data_3d; % matrix of simulated data in 3d format - this is what sim_choices_new is using to load NLI figures for each hh/sim/year

        tic
        fp.estimate = 0; % no estimation: report full results
        obj_eval = build_obj(param_est,fp,col_long,data,data_mom);
        disp('Objective function value at current estimates:')
        disp(obj_eval.obj_value)
        sim_data = obj_eval.sim_data; % save simulated data
        toc
        % Save simulated data in the big matrix
        sim_data_all(:,:,mm) = sim_data;
    end % mm loop

    save([output_dir 'temp_compstat_SES'],'sim_data_all');  % transfer of 500$ at age t=15

    % Create table with output for paper
    tables_compstat_SES(col_long,fp,output_dir)
end

%% Save everything

save([output_dir 'saved_workspace']);
